set more off
pause off

/**********************************************************************
* Cheaper Faster and More than Good Enough: 
Is GPS the New Gold Standard in Land Area Measurement -- SRM

** File prepares data from Tanzania Measuring Cassava Productivity for analysis.
** Data files available at: http://go.worldbank.org/1KXFSOL9U0

Note: All variables that could potentially be used to identify survey 
respondents have been withheld from public data release. Syntax that 
references these variables may also have been hidden in order to protect 
respondent confidentiality.	Shapemetric data for this study has not been
ade public, and therefore is not included in this syntax/						
***********************************************************************/

global zanzi 		/*XXX*/
global zanzir1a 	"$zanzi/diary/r1a"
global zanzir1b 	"$zanzi/diary/r1ar1b"
global zanzir2 		"$zanzi/diary/r1ar2"
global zanzihh 		"$zanzi/diary/hh_survey"
global shapemetrics	/*private data*/


use "$zanzi/area_measurement/AreaMeasurement.dta", clear
// clean up below was conducted on an early-release data file, and is based
// on some variables that are not released publicly due to anonymity constraints.
// Code replaced with "XX" where anonymized.

	ren am4 hhid
	ren am7 plotid
	
	//Duplicates
	duplicates tag hhid plotid, gen(d) 
	tab d // 74 duplicates
	drop d
	
	duplicates tag hhid plotid am0 am0u am14 am18, gen(d2)
	tab d2 // 16 dups have same SR area, GPS area, CR area(double entered)
	duplicates drop hhid plotid am0 am0u am14 am18, force
	drop d2
	
	duplicates tag hhid plotid, gen(d3) 
	tab d3 // 58 duplicates
	drop d3
	
		//Looks like some were entered 2x but one of the data entry persons truncates area to one decimal
		tostring am18, replace
		split am18, gen(gps) parse(.)
		duplicates tag hhid plotid am9s am0 am0u gps1, gen(d4)
		tab d4 // 8
		duplicates drop hhid plotid am9s am0 am0u gps1, force
		drop d4
	
		//Spot Check
		drop if hhid==2933 & plotid==1 & am28c==51
		
	duplicates tag hhid plotid am1 am2 am6, gen(d5) 
	tab d5 // 6 duplicates
		//Same plot name, assuming field was remeasured - keeping second measurement
		drop if hhid==2723 & plotid==1 & am29a==230913
		drop if hhid==2723 & plotid==2 & am29a==250913
		drop if hhid==3110 & plotid==1 & am29a==120913
	drop d5
	
	duplicates tag hhid plotid am1 am2, gen(d6) 
	tab d6 // 30 obs
		//Spot check remaining duplicates.  If appears to be re-measurement, keep second measurement
		drop if hhid==2321 & plotid==1 & am29a==240813
		drop if hhid==2703 & plotid==1 & am29a==240913
		drop if hhid==3534 & plotid==1 & am29a==180813
		drop if hhid==2234 & plotid==1 & am29a==220713
		drop if hhid==2411 & plotid==1 & am6=="KARIBU NA NYUMBA"
		drop if hhid==2702 & plotid==2 & am0==.
		replace plotid=2 if plotid==1 & hhid==2702 & am6=="MWANAMWANJA"
		replace plotid=1 if plotid==2 & hhid==730 & am6=="MTOFAANI"
		drop if hhid==2702 & plotid==2 & am6=="MGENIKAGE"
		drop if hhid==2703 & plotid==3 & am6=="GONGONI"
		drop if hhid==2712 & plotid==1 & am0==1
		drop if hhid==2830 & plotid==1 & am29a==211013
		replace plotid=2 if plotid==1 & hhid==2821 & am6=="FUMBI KUU"
		replace plotid=1 if plotid==2 & hhid==2821 & am6=="MBUYU NGOMBE"
		drop if hhid==2821 & am6=="SHAGAA" & am29a==300913
		replace plotid=4 if plotid==5 & hhid==2821 & am6=="likoni"
		replace plotid=3 if plotid==4 & hhid==2821 & am6=="SHAGAA"
		drop if plotid==2 & hhid==2908 & am0!=.75
		drop if plotid==2 & hhid==3503 & am0!=.75 // more errors found in this obs, also appears to be first measurement
		replace hhid=3230 if hhid==330 & plotid==2 & am5!="MOHD OMAR"
		replace hhid=630 if hhid==1630 & plotid==1 & am5=="FATMA"
		replace hhid=3028 if hhid==2028 & plotid==2 & am5=="ABDALLA ALI"
		drop if hhid==3028 & plotid==2 & am29b==999999
		replace hhid=3027 if hhid==2027 & plotid==1 & am5=="MWALIMU ALI"
		drop if hhid==3027 & plotid==1 & am28b==40
		drop if hhid==3026 & plotid==1 & am6=="MKOMBWE"
		replace plotid=1 if plotid==3 & am6=="NGUZUMO"
		drop if plotid==5 & hhid==3026

	//NO CLEAR SOLUTION FOR THESE: PLOT NAMES DO NOT MATCH, 
	//SR AREAS ARE DIFFERENT BETWEEN QUESTIONNAIRE S4 and AREA FORM	
		drop if hhid==1204 & plotid==1
		drop if hhid==2208 & plotid==1
		drop if hhid==2323 & plotid==1
		drop if hhid==2821 & plotid==2 & am5=="IDDI JUMA" // person not in HH
		drop if hhid==332 & plotid==1 & am5=="ADAM KH" // --> incorrect HHID, need to dig further
	drop d6
		
isid hhid plotid	
count // 1951


*******************************
**Rename to match other sets **
*******************************

		ren am14 CRarea
		ren am13 cr_perimeter
		ren am15 closingerr
		ren am17 gps_perimeter
		ren am18 GPSarea
		ren am19 gps_accuracy
		ren am20 num_satellite
		ren am21 treecover
		ren am22 weather
		ren am28a enum_id
		ren am29am int_month
		ren am29ay int_year
		ren am28b sup_id
		ren am0 SR_quant
		ren am0u SR_unit
		ren am1 district
		ren am2 ward
		ren am3 shehia
		
					//Clean Up
					replace num_sat=16 if num_sat==61 & hhid==2010
					replace num_sat=18 if num_sat==81 & hhid==2813
					replace num_sat=18 if num_sat==33 & hhid==3212
					replace num_sat=18 if num_sat==3 & hhid==3215
					replace num_sat=19 if num_sat==. & hhid==3229
					
					replace gps_acc=. if gps_acc==99 | gps_acc==0 // 10 obs
					replace gps_acc=3 if gps_acc==. & (hhid==3104 | hhid==3105 | hhid==3120 | ///
						hhid==3215 | hhid==3204 | hhid==3209 | hhid==3236) // many HH fields w/same
					replace gps_acc=3 if gps_acc==. // 4 obs, overwhelming average
					
					replace treecover=3 if treecover==9 & hhid==3023
					replace treecover=. if treecover==9
					replace weather=. if weather==9

					replace closingerr=3.8 if closingerr==0 & hhid==3306 & plotid==1 // recalculated
					replace closingerr=1 if closingerr==0 & hhid==613 & plotid==1 // recalculated

******************************
**Convert Area Measurements **
******************************		

					//Clean Up
					replace SR_quant=. if SR_quant==0 // 24 obs
					replace SR_unit=. if SR_unit==0 // 5
					replace SR_quant=.75 if SR_quant==75 & hhid==829 & plotid==2
					replace gps_perimeter=. if gps_perimeter==0 | gps_perimeter==99999
					replace cr_perimeter=. if cr_perimeter==0 | cr_perimeter==99999
					replace CRarea=. if CRarea==0 | CRarea==99999

					destring GPSarea, replace
					replace GPSarea=. if GPSarea==0 | GPSarea==99999
					replace GPSarea=2362.5 if gps_perimeter==2362.5 & GPSarea==3 & hhid==2729
					replace gps_perimeter=. if gps_perimeter==2362.5 & hhid==2729

					replace GPSarea=2881.8 if gps_perimeter>2000 & GPSarea==3 & hhid==2936
					replace gps_perimeter=. if gps_perimeter>2000 & hhid==2936
					replace GPSarea=2815 if GPSarea==28.15 & hhid==2913
					replace GPSarea=3013 if GPSarea==30.13 & hhid==801
					replace GPSarea=. if GPSarea==93 & gps_perimeter==93 & hhid==2002
					replace GPSarea=808.5 if GPSarea==88.5 & hhid==3109 & plotid==2
					replace GPSarea=1077.4 if GPSarea==107.74 & hhid==207 & plotid==1
					replace gps_perimeter=82.34 if gps_perimeter==8234 & hhid==1735 & plotid==1
					replace gps_perimeter=88.8 if gps_perimeter==888 & hhid==2803 & plotid==3
					replace gps_perimeter=. if gps_perimeter>400 & hhid==2818 & plotid==2
					replace gps_perimeter=352.3 if gps_perimeter==3523 & hhid==3523 & plotid==2
					replace cr_perimeter=114.9 if cr_perimeter==. & hhid==241 & plotid==1
					replace gps_perimeter=88.8 if gps_perimeter==888 & hhid==2803 & plotid==4

					
					//GPS area and perimeter switched
					replace GPSarea=207 if GPSarea==62.6 & hhid==107 & plotid==1
					replace gps_perimeter=62.6 if gps_perimeter==207 & hhid==107 & plotid==1
					replace GPSarea=496.6 if GPSarea==91 & hhid==2818 & plotid==2
					replace gps_perimeter=91 if gps_perimeter==496.6 & hhid==2818 & plotid==2
					replace GPSarea=381.3 if GPSarea==75 & hhid==3517 & plotid==2
					replace gps_perimeter=75 if gps_perimeter==381.3 & hhid==3517 & plotid==2
					replace GPSarea=1388.9 if GPSarea==150 & hhid==136 & plotid==2
					replace gps_perimeter=150 if hhid==136 & plotid==2
					replace GPSarea=517.4 if GPSarea==103 & hhid==2615 & plotid==1
					replace gps_perimeter=103 if hhid==2615 & plotid==1
					replace GPSarea=2100.2 if GPSarea==180 & hhid==3512 & plotid==1
					replace gps_perimeter=180 if hhid==3512 & plotid==1
					replace GPSarea=1341.6 if GPSarea==139 & hhid==3026 & plotid==1
					replace gps_perimeter=139 if hhid==3026 & plotid==1

	**Convert Areas to Acres
	replace SR_unit=. if SR_unit==9
	replace SR_unit=. if SR_quant==.
	
			//46 missing SR areas from Eneo Form, fill in with SR area from questionnaire
			ren hhid cs_5
			ren plotid plot_id
			merge 1:1 cs_5 plot_id using "$zanzihh/HH_All_S4.dta", keepus(s4_1 s4_3a s4_3u)
				keep if _m==3  
				drop _m
			
			ren cs_5 hhid
			ren plot_id plotid
			*br hhid plotid am6 s4_1 s4_3a s4_3u if SR_quant==.
				replace SR_quant=s4_3a if SR_quant==.
				replace SR_unit=s4_3u if SR_unit==.
				drop s4_1 s4_3a s4_3u
			
			// Clean Up
			replace GPSarea=3525.09 if GPSarea==35250.09 & hhid==2314 & plotid==1
			replace gps_perimeter=239.4 if gps_perimeter==2394 & hhid==2314 & plotid==1
			replace GPSarea=2330.9 if GPSarea==233.09 & hhid==724 & plotid==1
			replace GPSarea=803.3 if GPSarea==8033 & hhid==2535 & plotid==2
			replace GPSarea=264.95 if GPSarea==2640.95 & hhid==2803 & plotid==4
			replace GPSarea=400.2 if GPSarea==4002 & hhid==3530 & plotid==1
			replace GPSarea=2063.6 if GPSarea==263.6 & hhid==3624 & plotid==1 // uncertain
			replace GPSarea=1889.7 if GPSarea==18889.7 & hhid==2117 & plotid==1
			replace GPSarea=1300.4 if GPSarea==13004 & hhid==335 & plotid==1
			replace CRarea=1319.6 if hhid==727 & plotid==1 // recalculated w/bearings
			replace CRarea=964 if hhid==501 & plotid==3 // recalculated w/bearings
			replace CRarea=1321.1 if hhid==3105 & plotid==2 // recalculated w/bearings
			replace closingerr=10.6 if hhid==3105 & plotid==2 // recalculated w/bearings
			replace CRarea=629.9 if hhid==708 & plotid==2 // recalculated w/bearings
			replace CRarea=718.4 if hhid==1314 & plotid==2 // recalculated w/bearings
			replace CRarea=1663.4 if hhid==1819 & plotid==1 // recalculated w/bearings
			replace CRarea=776.7 if hhid==2426 & plotid==1 // recalculated w/bearings
			replace CRarea=1041.7 if hhid==2109 & plotid==1 // recalculated w/bearings
			replace cr_perimeter=241.7 if hhid==3624 & plotid==1 // recalculated w/bearings
			replace CRarea=1687.1 if hhid==836 & plotid==1 // recalculated w/bearings

	gen SR_acres=SR_quant if SR_unit==2
	replace SR_acres=SR_quant*0.0002471053814672 if SR_unit==1
	// 19 obs missing SR area
		
	foreach x in GPS CR {
	replace `x'area=. if `x'area==0
	gen `x'_acres=`x'area*0.0002471053814672
	}
	
	
************************
**Measurement Duration**
************************	
	
			//Clean Up
			// Some start & end times appear to be switched (comparing GPS & CR times)
			replace am25a=554 if hhid==110 & plotid==2
			replace am25ah=5 if hhid==110 & plotid==2
			replace am25am=54 if hhid==110 & plotid==2
			replace am25b=835 if hhid==110 & plotid==2
			replace am25bh=8 if hhid==110 & plotid==2
			replace am25bm=35 if hhid==110 & plotid==2
			
			replace am25a=138 if hhid==1330 & plotid==1
			replace am25ah=1 if hhid==1330 & plotid==1
			replace am25am=38 if hhid==1330 & plotid==1
			replace am25b=403 if hhid==1330 & plotid==1
			replace am25bh=4 if hhid==1330 & plotid==1
			replace am25bm=3 if hhid==1330 & plotid==1
			
			replace am25a=321 if hhid==2030 & plotid==1
			replace am25ah=3 if hhid==2030 & plotid==1
			replace am25am=21 if hhid==2030 & plotid==1
			replace am25b=457 if hhid==2030 & plotid==1
			replace am25bh=4 if hhid==2030 & plotid==1
			replace am25bm=57 if hhid==2030 & plotid==1
				
			replace am25a=445 if hhid==2106 & plotid==1
			replace am25ah=4 if hhid==2106 & plotid==1
			replace am25am=45 if hhid==2106 & plotid==1
			replace am25b=530 if hhid==2106 & plotid==1
			replace am25bh=5 if hhid==2106 & plotid==1
			replace am25bm=30 if hhid==2106 & plotid==1
				
			replace am25a=414 if hhid==3334 & plotid==1
			replace am25ah=4 if hhid==3334 & plotid==1
			replace am25am=14 if hhid==3334 & plotid==1
			replace am25b=505 if hhid==3334 & plotid==1
			replace am25bh=5 if hhid==3334 & plotid==1
			replace am25bm=5 if hhid==3334 & plotid==1
			
			replace am25a=336 if hhid==3532 & plotid==2
			replace am25ah=3 if hhid==3532 & plotid==2
			replace am25am=36 if hhid==3532 & plotid==2
			replace am25b=530 if hhid==3532 & plotid==2
			replace am25bh=5 if hhid==3532 & plotid==2
			replace am25bm=30 if hhid==3532 & plotid==2
			
			replace am25a=401 if hhid==2007 & plotid==1
			replace am25am=1 if hhid==2007 & plotid==1
			replace am25b=421 if hhid==2007 & plotid==1
			replace am25bm=21 if hhid==2007 & plotid==1
			replace am25a=404 if hhid==2515 & plotid==3
			replace am25am=4 if hhid==2515 & plotid==3
			replace am25b=420 if hhid==2515 & plotid==3
			replace am25bm=20 if hhid==2515 & plotid==3
			replace am25a=208 if hhid==2735 & plotid==3
			replace am25am=8 if hhid==2735 & plotid==3
			replace am25b=255 if hhid==2735 & plotid==3
			replace am25bm=55 if hhid==2735 & plotid==3
			replace am25a=404 if hhid==3016 & plotid==2
			replace am25am=4 if hhid==3016 & plotid==2
			replace am25b=451 if hhid==3016 & plotid==2
			replace am25bm=51 if hhid==3016 & plotid==2
			replace am25a=519 if hhid==3309 & plotid==1
			replace am25am=19 if hhid==3309 & plotid==1
			replace am25b=540 if hhid==3309 & plotid==1
			replace am25bm=40 if hhid==3309 & plotid==1
			replace am25a=514 if hhid==202 & plotid==3
			replace am25ah=5 if hhid==202 & plotid==3
			replace am25a=103 if hhid==204 & plotid==3 	
			replace am25ah=1 if hhid==204 & plotid==3 	
			replace am25a=509 if hhid==604 & plotid==3 	
			replace am25ah=5 if hhid==604 & plotid==3 
			replace am25am=9 if hhid==604 & plotid==3 
			replace am25b=523 if hhid==713 & plotid==2 
			replace am25bh=5 if hhid==713 & plotid==2 
			replace am25b=830 if hhid==815 & plotid==2 
			replace am25bm=30 if hhid==815 & plotid==2 
			replace am25b=150 if hhid==1502 & plotid==2 
			replace am25bh=1 if hhid==1502 & plotid==2 
			replace am25a=347 if hhid==2036 & plotid==1 
			replace am25ah=3 if hhid==2036 & plotid==1  
			replace am25a=532 if hhid==2214 & plotid==1 
			replace am25ah=5 if hhid==2214 & plotid==1 
			replace am25a=459 if hhid==2832 & plotid==1 
			replace am25ah=4 if hhid==2832 & plotid==1 
			replace am25a=316 if hhid==3030 & plotid==2 
			replace am25ah=3 if hhid==3030 & plotid==2 
			replace am25a=216 if hhid==3306 & plotid==2 
			replace am25ah=2 if hhid==3306 & plotid==2 
			replace am25a=222 if hhid==3335 & plotid==2 
			replace am25ah=2 if hhid==3335 & plotid==2 
			replace am25a=422 if hhid==3632 & plotid==2 
			replace am25ah=4 if hhid==3632 & plotid==2 

			//GPS times
			replace am26b=605 if am26b==506 & hhid==302 & plotid==1
			replace am26bh=6 if hhid==302 & plotid==1
			replace am26bm=5 if hhid==302 & plotid==1
			replace am26b=758 if am26b==358 & hhid==521 & plotid==1
			replace am26bh=7 if hhid==521 & plotid==1
			replace am26a=540 if am26a==840 & hhid==926 & plotid==3
			replace am26ah=5 if hhid==926 & plotid==3
			replace am26b=548 if am26b==448 & hhid==926 & plotid==3
			replace am26bh=5 if hhid==926 & plotid==3
			replace am26b=700 if am26b==100 & hhid==1423 & plotid==1
			replace am26bh=7 if hhid==1423 & plotid==1
			replace am26b=316 if am26b==106 & hhid==1912 & plotid==1
			replace am26bh=3 if hhid==1912 & plotid==1
			replace am26bm=16 if hhid==1912 & plotid==1
			replace am26b=833 if am26b==333 & hhid==2002 & plotid==1
			replace am26bh=8 if hhid==2002 & plotid==1
			replace am26b=730 if am26b==230 & hhid==3304 & plotid==1
			replace am26bh=7 if hhid==3304 & plotid==1
			replace am26a=535 if am26a==435 & hhid==3621 & plotid==1
			replace am26ah=4 if hhid==3621 & plotid==1
			replace am26b=357 if am26b==337 & hhid==224 & plotid==1
			replace am26bm=57 if hhid==224 & plotid==1 
			replace am26a=559 if am26a==659 & hhid==311 & plotid==1
			replace am26ah=5 if hhid==311 & plotid==1 
			replace am26b=540 if am26b==504 & hhid==328 & plotid==1
			replace am26bm=40 if hhid==328 & plotid==1 
			replace am26a=406 if hhid==616 & plotid==1
			replace am26am=6 if hhid==616 & plotid==1
			replace am26b=410 if hhid==616 & plotid==1
			replace am26bm=10 if hhid==616 & plotid==1
			replace am26b=549 if hhid==635 & plotid==2
			replace am26bm=49 if hhid==635 & plotid==2
			replace am26b=650 if hhid==910 & plotid==1
			replace am26bm=50 if hhid==910 & plotid==1
			replace am26a=457 if hhid==1411 & plotid==1
			replace am26ah=4 if hhid==1411 & plotid==1
			replace am26a=525 if hhid==1704 & plotid==1
			replace am26am=25 if hhid==1704 & plotid==1
			replace am26b=527 if hhid==1704 & plotid==1
			replace am26bm=27 if hhid==1704 & plotid==1
			replace am26a=752 if hhid==1722 & plotid==2
			replace am26ah=7 if hhid==1722 & plotid==2
			replace am26b=1012 if hhid==2417 & plotid==1
			replace am26bm=12 if hhid==2417 & plotid==1
			replace am26a=606 if hhid==2528 & plotid==1
			replace am26am=6 if hhid==2528 & plotid==1
			replace am26b=409 if hhid==2530 & plotid==1
			replace am26bh=4 if hhid==2530 & plotid==1
			replace am26a=840 if hhid==2611 & plotid==1
			replace am26ah=8 if hhid==2611 & plotid==1
			replace am26a=855 if hhid==2801 & plotid==4
			replace am26am=55 if hhid==2801 & plotid==4
			replace am26b=857 if hhid==2801 & plotid==4
			replace am26bm=57 if hhid==2801 & plotid==4
			replace am26b=304 if hhid==2803 & plotid==2
			replace am26bh=3 if hhid==2803 & plotid==2
			replace am26a=546 if hhid==3008 & plotid==2
			replace am26am=46 if hhid==3008 & plotid==2
			replace am26b=547 if hhid==3008 & plotid==2
			replace am26bm=47 if hhid==3008 & plotid==2
			replace am26a=551 if hhid==3302 & plotid==2
			replace am26am=51 if hhid==3302 & plotid==2
			replace am26b=552 if hhid==3302 & plotid==2
			replace am26bm=52 if hhid==3302 & plotid==2
			replace am26a=320 if hhid==3323 & plotid==2
			replace am26am=20 if hhid==3323 & plotid==2
			replace am26b=326 if hhid==3323 & plotid==2
			replace am26bm=26 if hhid==3323 & plotid==2
			replace am26b=500 if hhid==3518 & plotid==2
			replace am26bh=5 if hhid==3518 & plotid==2
			replace am26a=627 if hhid==3520 & plotid==1
			replace am26am=27 if hhid==3520 & plotid==1
			replace am26b=645 if hhid==3520 & plotid==1
			replace am26bm=45 if hhid==3520 & plotid==1
			replace am26b=235 if hhid==3630 & plotid==1
			replace am26bm=35 if hhid==3630 & plotid==1
			replace am26b=523 if hhid==504 & plotid==2
			replace am26bh=5 if hhid==504 & plotid==2
			replace am26b=549 if hhid==520 & plotid==1
			replace am26bh=5 if hhid==520 & plotid==1
			replace am26b=456 if hhid==723 & plotid==2
			replace am26bh=4 if hhid==723 & plotid==2
			replace am26a=526 if hhid==918 & plotid==3
			replace am26ah=5 if hhid==918 & plotid==3
			replace am26b=649 if hhid==1009 & plotid==1
			replace am26bh=6 if hhid==1009 & plotid==1
			replace am26b=605 if hhid==1315 & plotid==2
			replace am26bh=6 if hhid==1315 & plotid==2
			replace am26b=617 if hhid==1636 & plotid==1 
			replace am26bm=17 if hhid==1636 & plotid==1 
			replace am26b=700 if hhid==1801 & plotid==1
			replace am26bh=7 if hhid==1801 & plotid==1
			replace am26b=750 if hhid==1812 & plotid==1
			replace am26bh=7 if hhid==1812 & plotid==1
			replace am26b=410 if hhid==1906 & plotid==1
			replace am26bh=4 if hhid==1906 & plotid==1
			replace am26b=510 if hhid==2122 & plotid==1
			replace am26bh=5 if hhid==2122 & plotid==1
			replace am26b=557 if hhid==2203 & plotid==4
			replace am26bh=5 if hhid==2203 & plotid==4
			replace am26b=948 if hhid==2218 & plotid==1
			replace am26bh=9 if hhid==2218 & plotid==1
			replace am26b=850 if hhid==2536 & plotid==1
			replace am26bh=8 if hhid==2536 & plotid==1
			replace am26a=458 if hhid==2701 & plotid==4
			replace am26ah=4 if hhid==2701 & plotid==4
			replace am26b=510 if hhid==2701 & plotid==4
			replace am26bh=5 if hhid==2701 & plotid==4
			replace am26a=928 if hhid==2710 & plotid==2
			replace am26ah=9 if hhid==2710 & plotid==2
			replace am26am=28 if hhid==2710 & plotid==2
			replace am26a=547 if hhid==2810 & plotid==1
			replace am26ah=5 if hhid==2810 & plotid==1
			replace am26a=536 if hhid==2927 & plotid==1
			replace am26ah=5 if hhid==2927 & plotid==1
			replace am26a=659 if hhid==2927 & plotid==2
			replace am26ah=6 if hhid==2927 & plotid==2
			replace am26am=59 if hhid==2927 & plotid==2
			replace am26b=355 if hhid==3333 & plotid==1
			replace am26bh=3 if hhid==3333 & plotid==1	
			replace am26a=247 if hhid==3336 & plotid==2
			replace am26am=47 if hhid==3336 & plotid==2
			replace am26b=255 if hhid==3336 & plotid==2
			replace am26bh=2 if hhid==3336 & plotid==2
			replace am26a=506 if hhid==1803 & plotid==1
			replace am26ah=5 if hhid==1803 & plotid==1
			replace am26am=6 if hhid==1803 & plotid==1
			replace am26a=359 if hhid==3110 & plotid==2
			replace am26ah=3 if hhid==3110 & plotid==2
			replace am26am=59 if hhid==3110 & plotid==2

	**CR**
	replace am25a=. if am25a==0 | am25a==9999
			
	gen x=am25am/60
	gen start=am25ah + x
	
	gen y=am25bm/60
	gen finish=am25bh + y
	
	gen CRtime=finish-start
	label var CRtime "CR time (hours)"
	count if CRtime<0 // 13 observations 
	replace CRtime=CRtime+12 if CRtime<0 & CRtime!=.
	replace CRtime=. if am25a==. | am25b==.
	
	**GPS**
	replace am26a=. if am26a==0 | am26a==9999 
	replace am26b=. if am26b==0 | am26b==9999 

	gen xx=am26am/60
	gen gstart=am26ah + xx
	
	gen yy=am26bm/60
	gen gfinish=am26bh + yy
	
	gen GPStime=gfinish-gstart
	label var GPStime "GPS time (hours)"
	count if GPStime<0 // 2 observations 
	replace GPStime=GPStime+12 if GPStime<0 & GPStime!=.
	replace GPStime=. if am26a==. | am26b==.
	
		// Outliers for time
		replace GPStime=. if am26b==600 & hhid==2304 & plotid==2
		
	keep hhid plotid SR_quant SR_unit am5 am6 am8 cr_perimeter CRarea closingerr ///
		gps_perimeter GPSarea gps_accuracy num_satellite treecover weather enum_id ///
		int_month int_year sup_id SR_acres GPS_acres CR_acres CRtime GPStime district ward shehia	

tempfile areas
save `areas'

***********************
** Number of Corners **
***********************

use "$zanzi/area_measurment/CompassRoster.dta", clear		

	ren am4 hhid
	ren am7 plotid
	drop if am12==. // 5 obs, blank lines
	drop if am10a=="" // 4 obs, blank lines
	
	drop N  n am12_m am11b_m am11a_m am10b_m am10a_m am7_m am4_m
	duplicates drop // 126
	duplicates drop hhid plotid am10a am10b, force // 215 obs
	duplicates drop hhid plotid am10a, force // 55 obs 
	//for this only interested in number of sides, not bearings/distances
	
	isid hhid plotid am10a
	bysort hhid plotid: egen num_corners=count(hhid)
	collapse (max) num_corners, by(hhid plotid)
	label var num_corners "number of corners in CR measurement"

tempfile sides
save `sides'

**********************
**HH Characteristics** 
**********************
	
	use "$zanzihh\HH_All_S1.dta", clear
	keep cs_5 s1_id s1_2 s1_4 s1_5 s1_10 s1_11 s1_12 
	ren cs_5 hhid
	
		bysort hhid: egen hhsize=count(s1_id)
		
		keep if s1_5==1 // hh heads only
		duplicates tag hhid, gen(d) // 8 hh with 2 heads
		drop if d>0 & s1_id!=1
		drop d
		
		gen head_female=(s1_2==2 & s1_5==1)
		
		replace s1_4=. if s1_5==1 & s1_4 <10 // Head age improbable // 4 changes
		// replace missing head age with average head age
		egen mean_age=mean(s1_4) if s1_5==1
		replace s1_4=mean_age if s1_4==. & s1_5==1
		
		gen head_age=s1_4 if s1_5==1
						
	gen yrsed=.
		replace yrsed=1 if s1_12==11
		replace yrsed=2 if s1_12==12
		replace yrsed=3 if s1_12==13
		replace yrsed=4 if s1_12==14
		replace yrsed=5 if s1_12==15
		replace yrsed=6 if s1_12==16
		replace yrsed=7 if s1_12==17
		replace yrsed=8 if s1_12==18
		replace yrsed=9 if s1_12==19 | s1_12==20
		replace yrsed=10 if s1_12==21
		replace yrsed=11 if s1_12==22
		replace yrsed=12 if s1_12==23
		replace yrsed=13 if s1_12==24 | s1_12==25
		replace yrsed=14 if s1_12==31 
		replace yrsed=15 if s1_12==32 | s1_12==33
		replace yrsed=16 if s1_12==34
		replace yrsed=17 if s1_12==41
		replace yrsed=18 if s1_12==42
		replace yrsed=19 if s1_12==43
		replace yrsed=20 if s1_12==44
		replace yrsed=21 if s1_12==45
		replace yrsed=0 if yrsed==.
	
	gen head_yrsed=yrsed if s1_5==1
			
	gen head_literate=[s1_10!=5 & s1_10!=9]
	
	collapse (max) head_yrsed head_age head_female hhsize head_literate, by(hhid)
			label var hhsize "number of household members"
			label var head_female "female headed HH"
			label var head_age "HH head age (years)"
			label var head_yrsed "HH head yrs of education"
			label var head_literate "HH head can read or write"

	tempfile head
	save `head'

		
***************************
**  subjective plot info **
***************************

	*- D1/D2
	use "$zanzihh/HH_All_S4.dta", clear
		ren s4_13 soilqual_sr
		replace soilqual_sr=. if soilqual_sr==9

		gen title=(s4_29<10 & s4_29!=.)
			label variable title "HH has title for land"
		gen collateral=(s4_31==1)
			label variable collateral "HH has right to sell or use as collateral"
		gen prop=(title==1 | collateral==1)

		bysort cs_5: egen num_cult_fields=count(plot_id)
		
		gen dist_home = s4_5h
			replace dist_home=. if dist_hom==999
			
		ren cs_5 hhid
		keep hhid plot_id soilqual_sr title collateral prop num_cult dist_home
		
		ren plot_id plotid
		tempfile plotatt
		save `plotatt'
		
*********************
** Merge plot info **
*********************

use `areas'
merge 1:1 hhid plotid using `sides'
	drop if _m==2
	drop _m

merge 1:1 hhid plotid using `plotatt'
	drop if _m==2
	drop _m
	
merge m:1 hhid using `head'
	drop if _m==2
	drop _m
	
gen survey=2
label define SURVEY 1 "MLASS Ethiopia" 2 "Zanzibar Experiment" 3 "Nigeria Experiment" 4 "LSMS-ISA Malawi 2010/11"
label val survey SURVEY

gen GPS_model="Garmin eTrex 30"

** KEEP OBS WITH GPS & CR MEASUREMENT **
drop if GPS_acres==. | CR_acres==. // 4 dropped

*************************
**Gen LEVELS of CR area**
*************************
	gen level_cr=.
	replace level_cr=1 if CR_acres<0.06177634536679  								// 250 sq. meters / 0.025 ha
	replace level_cr=2 if CR_acres>=0.06177634536679 & CR_acres<0.1235526907336 	// 500 sq. meters / 0.05 ha
	replace level_cr=3 if CR_acres>=0.1235526907336 & CR_acres<0.3706580722008 		// 1500 sq. meters / 0.15 ha
	replace level_cr=4 if CR_acres>=0.3706580722008 & CR_acres<0.6177634536679 		// 2500 sq. meters / 0.25 ha
	replace level_cr=5 if CR_acres>=0.6177634536679 & CR_acres<1.235526907336  		// 5000 sq. meters / 0.5 hectare
	replace level_cr=6 if CR_acres>=1.235526907336  & CR_acres!=. 

	//levels in ACRES
	gen level_cr_acres=.
	replace level_cr_acres=1 if CR_acres<0.05  								
	replace level_cr_acres=2 if CR_acres>=0.05 & CR_acres<0.15 	
	replace level_cr_acres=3 if CR_acres>=0.15 & CR_acres<0.35		
	replace level_cr_acres=4 if CR_acres>=0.35 & CR_acres<0.75		
	replace level_cr_acres=5 if CR_acres>=0.75 & CR_acres<1.25 		
	replace level_cr_acres=6 if CR_acres>=1.25  & CR_acres!=. 

**************************
**Gen LEVELS of GPS area**
**************************
	gen level_gps=.
	replace level_gps=1 if GPS_acres<0.06177634536679  								// 250 sq. meters / 0.025 ha
	replace level_gps=2 if GPS_acres>=0.06177634536679 & GPS_acres<0.1235526907336 	// 500 sq. meters / 0.05 ha
	replace level_gps=3 if GPS_acres>=0.1235526907336 & GPS_acres<0.3706580722008 		// 1500 sq. meters / 0.15 ha
	replace level_gps=4 if GPS_acres>=0.3706580722008 & GPS_acres<0.6177634536679 		// 2500 sq. meters / 0.25 ha
	replace level_gps=5 if GPS_acres>=0.6177634536679 & GPS_acres<1.235526907336  		// 5000 sq. meters / 0.5 hectare
	replace level_gps=6 if GPS_acres>=1.235526907336  & GPS_acres!=. 

	//levels in ACRES
	gen level_gps_acres=.
	replace level_gps_acres=1 if GPS_acres<0.05  								
	replace level_gps_acres=2 if GPS_acres>=0.05 & GPS_acres<0.15 	
	replace level_gps_acres=3 if GPS_acres>=0.15 & GPS_acres<0.35		
	replace level_gps_acres=4 if GPS_acres>=0.35 & GPS_acres<0.75		
	replace level_gps_acres=5 if GPS_acres>=0.75 & GPS_acres<1.25 		
	replace level_gps_acres=6 if GPS_acres>=1.25  & GPS_acres!=. 

	label define level_acres 1 "<0.05 acres" 2 "<0.15 acres" 3 "<0.35 acres" ///
		4 "<0.75 acres" 5 "<1.25 acres" 6 ">=1.25 acres"
	label val level_cr_acres level_acres
	label val level_gps_acres level_acres

	label define level 1 "<250 sq. meters / 0.025 ha" 2 "<500 sq. meters / 0.05 ha" 3 "<1500 sq. meters / 0.15 ha" ///
		4 "<2500 sq. meters / 0.25 ha" 5 "<5000 sq. meters / 0.5 hectare" 6 ">=5000 sq. meters / 0.5 hectare"
	label val level_cr level
	label val level_gps level

**************************
** Gen "bias" variables **
**************************

	gen bias_gps = SR_acres-GPS_acres
	label var bias_gps "SR_acres-GPS_acres"		

	gen bias_cr = GPS_acres-CR_acres
	label var bias_cr "GPS_acres-CR_acres"		

	gen abs_bias_cr=abs(bias_cr)
	gen per_bias_cr=(bias_cr/CR_acre)*100
	gen abs_per_cr=(abs/CR_acre)*100	
		label var abs_bias_cr "absolute val. GPS - CR (acres)"
		label var per_bias_cr "relative bias (bias_cr/CR_acre * 100)"
		label var abs_per_cr "absolute val. relative bias, (|bias_cr|/CR_acre * 100)"

***************************************
**Impute missings for select variables*
***************************************
	
	**By Plot size
	//Number of corners missing for 4 obs 
	//CRtime missing for 2 obs 
	//GPStime missing for 8 obs 
	foreach var in CRtime GPStime {
		bysort level_cr: egen avg`var'=mean(`var')
		replace `var'=avg`var' if `var'==.
		drop avg`var'
	}
	foreach var in num_corners {
		bysort level_cr: egen mode`var'=mode(`var')
		replace `var'=mode`var' if `var'==.
		drop mode`var'
	}
	
	**By geographic area
	//treecover missing for 2 obs
	//weather missing for 1 obs
	foreach var in treecover weather {
		bysort shehia: egen mode`var'=mode(`var')
		replace `var'=mode`var' if `var'==.
		drop mode`var'
	}
	
	gen level_corner=.
	replace level_corner=1 if num_corners<=4
	replace level_corner=2 if num_corners>4 & num_corners<10
	replace level_corner=3 if num_corners>=10 & num_corners!=.
	label define CORNERS 1 "<= 4 sides" 2 "5 - 9 sides" 3 ">= 10 sides"
	label val level_corner CORNERS
	
	gen level_sat=.
	replace level_sat=1 if num_satellite<=15
	replace level_sat=2 if num_satellite>15 & num_satellite<20
	replace level_sat=3 if num_satellite>=20 & num_satellite!=.
	label define SATELLITES 1 "<=15 satellites" 2 "16 - 19 satellites" 3 ">= 20 satellites"
	label val level_sat SATELLITES

*************************************
** Gen aggregated SR unit variable **
*************************************

	label define SR_UNIT_AG 1 "Acre" 2 "Hectare" 3 "Sq. Meter" 4 "Non-Standard Unit"
	codebook SR_unit
	gen SR_unit_ag=1 if SR_unit==2 
	replace SR_unit_ag=3 if SR_unit==1
	label val SR_unit_ag SR_UNIT_AG
	
	gen standard_unit=(SR_unit_ag<4)
	label var standard_unit "SR in standard unit"
	
**Time in Minutes**
	foreach x in GPStime CRtime {
		gen `x'_min=`x'*60
		label var `x'_min "Measurement time (minutes)"
	}
	
	gen weather2=1 if weather==1 | weather==2 
	replace weather2=2 if weather==3 | weather==4 | weather==5 | weather==6 
	label define WEATHER2 1 "Clear/Partly Cloudy" 2 "Mostly Cloudy/All Cloudy/Rainy"
	label values weather2 WEATHER2
	label var weather2 "weather collapsed"
	
	gen CR2=CR_acres^2
	gen CR3=CR_acres^3
***********************
**Merge Shape Metrics**
***********************	
	
/*	merge 1:1 hhid plotid using "$shapemetrics"
	drop if _m==2 
*/
	
*******************************************
**TRIM TOP 1% OF ABSOLUTE VALUE OF % BIAS**
*******************************************	
	
	gen abs_bias_gps=abs(bias_gps)
	gen per_bias_gps=(bias_gps/GPS_acre)*100
	gen abs_per_gps=(abs_bias_gps/GPS_acre)*100
	
	sum abs_per_cr, d
	gen flagp1_cr=1 if abs_per_cr<r(p1)
	gen flagp99_cr=1 if abs_per_cr>r(p99) & abs_per_cr!=.

	sum abs_per_gps, d
	gen flagp1_gps=1 if abs_per_gps<r(p1)
	gen flagp99_gps=1 if abs_per_gps>r(p99) & abs_per_gps!=.

	drop if flagp99_cr==1 | flagp99_gps==1
	// DROPS 37 obs 

count //1908	

*save "$zanzi/CheaperFaster_Zanzibar.dta", replace


